map <-
read_csv("data/HydroWASTE_v10.csv")
## Rows: 58502 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): WWTP_NAME, COUNTRY, CNTRY_ISO, STATUS, LEVEL
## dbl (20): WASTE_ID, SOURCE, ORG_ID, LAT_WWTP, LON_WWTP, QUAL_LOC, LAT_OUT, L...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(map) %>%
knitr::kable(digits = 3) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"), font_size = 12) %>%
kableExtra::scroll_box(width = "100%", height = "300px")
| WASTE_ID | SOURCE | ORG_ID | WWTP_NAME | COUNTRY | CNTRY_ISO | LAT_WWTP | LON_WWTP | QUAL_LOC | LAT_OUT | LON_OUT | STATUS | POP_SERVED | QUAL_POP | WASTE_DIS | QUAL_WASTE | LEVEL | QUAL_LEVEL | DF | HYRIV_ID | RIVER_DIS | COAST_10KM | COAST_50KM | DESIGN_CAP | QUAL_CAP |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1 | 1140441 | Akmenes aglomeracija | Lithuania | LTU | 56.247 | 22.726 | 2 | 56.223 | 22.627 | Not Reported | 1060 | 2 | 148.213 | 4 | Advanced | 1 | 2421.974 | 20228874 | 4.153 | 0 | 0 | 4600 | 2 |
| 2 | 1 | 1140443 | Alytaus m aglomeracija | Lithuania | LTU | 54.432 | 24.056 | 2 | 54.519 | 24.098 | Not Reported | 87900 | 2 | 8797.904 | 1 | Advanced | 1 | 2534.527 | 20261585 | 257.983 | 0 | 0 | 220000 | 2 |
| 3 | 1 | 1140445 | Anyksciu aglomeracija | Lithuania | LTU | 55.509 | 25.073 | 2 | 55.452 | 25.006 | Not Reported | 12400 | 2 | 1959.285 | 1 | Advanced | 1 | 1367.809 | 20243105 | 30.995 | 0 | 0 | 33000 | 2 |
| 4 | 1 | 1140447 | Ariogalos aglomeracija | Lithuania | LTU | 55.252 | 23.484 | 2 | 55.210 | 23.510 | Not Reported | 2500 | 2 | 578.482 | 1 | Secondary | 1 | 2061.969 | 20247446 | 13.799 | 0 | 0 | 4357 | 2 |
| 5 | 1 | 1140449 | Baisogalos aglomeracija | Lithuania | LTU | 55.644 | 23.741 | 2 | 55.681 | 23.835 | Not Reported | 1200 | 2 | 167.788 | 4 | Secondary | 1 | 209.549 | 20239330 | 0.405 | 0 | 0 | 1490 | 2 |
| 6 | 1 | 1140451 | Birstono Prienu aglomeracija | Lithuania | LTU | 54.623 | 24.062 | 2 | 54.715 | 24.094 | Not Reported | 12400 | 2 | 2239.471 | 1 | Advanced | 1 | 10366.240 | 20256987 | 268.665 | 0 | 0 | 19000 | 2 |
wwtp <-
read_csv("data/SARS-CoV-2_concentrations_measured_in_NYC_Wastewater_20231129.csv")
## Rows: 4270 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Sample Date, Test date, WRRF Name, WRRF Abbreviation, Annotation, T...
## num (3): Concentration SARS-CoV-2 gene target (N1 Copies/L), Per capita SARS...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(wwtp) %>%
knitr::kable(digits = 3) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"), font_size = 12) %>%
kableExtra::scroll_box(width = "100%", height = "300px")
| Sample Date | Test date | WRRF Name | WRRF Abbreviation | Concentration SARS-CoV-2 gene target (N1 Copies/L) | Per capita SARS-CoV-2 load (N1 copies per day per population) | Annotation | Population Served, estimated | Technology |
|---|---|---|---|---|---|---|---|---|
| 08/31/2020 | 09/01/2020 | 26th Ward | 26W | 389 | 263535.6 | Concentration below Method Limit of Quantification (above Method Limit of Detection) | 290608 | RT-qPCR |
| 08/31/2020 | 09/01/2020 | Bowery Bay | BB | 1204 | 443632.9 | NA | 924695 | RT-qPCR |
| 08/31/2020 | 09/01/2020 | Coney Island | CI | 304 | 168551.6 | Concentration below Method Limit of Quantification (above Method Limit of Detection) | 682342 | RT-qPCR |
| 08/31/2020 | 09/01/2020 | Hunts Point | HP | 940 | 574446.6 | NA | 755948 | RT-qPCR |
| 08/31/2020 | 09/01/2020 | Jamaica Bay | JA | 632 | 233077.7 | NA | 748737 | RT-qPCR |
| 08/31/2020 | 09/01/2020 | Newtown Creek | NC | 197 | 122396.8 | Concentration below Method Limit of Quantification (above Method Limit of Detection) | 1156473 | RT-qPCR |
map_df <-
map |>
janitor::clean_names() |>
select(waste_id, wwtp_name, lat_wwtp, lon_wwtp) |>
mutate(wrrf_abbreviation = recode( wwtp_name,
"New York C Rockaway WPCP" = "RK",
"New York C Red Hook WPCP" = "RH",
"New York C Port Richmond WPCP" = "PR",
"New York C Oakwood Beach WPCP" = "OB",
"New York C 26th Ward WPCP" = "26W",
"New York C Tallman Island WPCP" = "TI",
"New York C North River WPCP" = "NR",
"New York C Coney Island WPCP" = "CI",
"New York C Jamaica WPCP" = "JA",
"New York C Hunts Point WPCP" = "HP",
"New York C Owls Head WPCP" = "OH",
"New York C Bowery Bay WPCP" = "BB",
"New York C Newtown Creek WPCP" = "NC",
"New York C Wards Island WPCP" = "WI",
))
wwtp_df <-
wwtp |>
janitor:: clean_names()
merge_df <- inner_join(map_df, wwtp_df, by = "wrrf_abbreviation")
nyc_wastewater <-
merge_df |>
rename(concentration = concentration_sars_co_v_2_gene_target_n1_copies_l) |>
drop_na(concentration) |>
separate(sample_date, into = c("month", "day", "year"), convert = TRUE) %>%
mutate(
year = as.character(year),
month = factor(month, levels = 1:12),
month = recode(month,
"1" = "January",
"2" = "February",
"3" = "March",
"4" = "April",
"5" = "May",
"6" = "June",
"7" = "July",
"8" = "August",
"9" = "September",
"10" = "October",
"11" = "November",
"12" = "December")) |>
select(-waste_id, -test_date, -per_capita_sars_co_v_2_load_n1_copies_per_day_per_population, -population_served_estimated, -wwtp_name, -annotation, -wrrf_abbreviation)
head(nyc_wastewater) %>%
knitr::kable(digits = 3) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"), font_size = 12) %>%
kableExtra::scroll_box(width = "100%", height = "300px")
| lat_wwtp | lon_wwtp | month | day | year | wrrf_name | concentration | technology |
|---|---|---|---|---|---|---|---|
| 40.788 | -73.923 | August | 31 | 2020 | Wards Island | 1339 | RT-qPCR |
| 40.788 | -73.923 | September | 8 | 2020 | Wards Island | 1065 | RT-qPCR |
| 40.788 | -73.923 | September | 13 | 2020 | Wards Island | 394 | RT-qPCR |
| 40.788 | -73.923 | September | 15 | 2020 | Wards Island | 316 | RT-qPCR |
| 40.788 | -73.923 | September | 20 | 2020 | Wards Island | 1250 | RT-qPCR |
| 40.788 | -73.923 | October | 6 | 2020 | Wards Island | 1152 | RT-qPCR |
Creating the dataset for an overall trend for Covid-19 concentration across the 14 waste water facilities within New York State from 2020 to 2023.
overall_trend <-
nyc_wastewater |>
filter(technology == "RT-qPCR") |>
select(-lat_wwtp, -lon_wwtp) |>
group_by(year, month) |>
summarise(avg_conc = mean(concentration)) |>
unite("year_month",year, month, sep = "_") |>
mutate(month_avg = ymd(paste0(year_month, "_01")))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
Plotting Scatterplot
overall_trend |>
ggplot(aes(x = month_avg, y = avg_conc)) +
geom_point(size = 3, alpha = .7) +
geom_smooth(size = 2, se = FALSE, alpha = .3) +
labs(
x = "Time",
y = "Monthly average Covid-19 concentration in waste water in NY (N/L)") +
theme(axis.line = element_line(color = "grey"),
panel.background = element_blank(),
legend.position = "none",
panel.grid.major = element_line(color = "light grey", linetype = "dashed"),
plot.title = element_text(hjust = 0.5)) +
ggtitle("Monthly average Covid-19 concentration in waste water in NY measured from 2020 to 2023")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Sub dataset
rtqpcr <-
nyc_wastewater |>
filter(year %in% c(2021, 2022), technology == "RT-qPCR") |>
select(-lat_wwtp, -lon_wwtp)
dpcr <-
nyc_wastewater |>
filter(year %in% c(2021, 2022), technology == "dPCR") |>
select(-lat_wwtp, -lon_wwtp)
dpcr is not large enough to do data analysis.
data analysis
spaghetti_plot_2021 <-
rtqpcr |>
filter(year == 2021) |>
group_by(month, wrrf_name) |>
summarise(avg_conc = mean(concentration)) |>
ggplot(aes(x = month, y = avg_conc, color = wrrf_name, group = wrrf_name)) +
geom_line(alpha = .5) +
geom_point(alpha = .5) +
labs(x = "Month",
y = "Average Covid-19 Concentration of each waste water facility",
title = "Average Covid-19 Concentration of each waste water facility in 2021 in New York State") +
theme(legend.position = "bottom",
legend.box.background = element_rect())
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
spaghetti_plot_2021
trend_plot_2022
spaghetti_plot_2022 <-
rtqpcr |>
filter(year == 2022) |>
group_by(month, wrrf_name) |>
summarise(avg_conc = mean(concentration)) |>
ggplot(aes(x = month, y = avg_conc, color = wrrf_name, group = wrrf_name)) +
geom_line(alpha = .5) +
geom_point(alpha = .5) +
labs(x = "Month",
y = "Average Covid-19 Concentration of each waste water facility",
title = "Average Covid-19 Concentration of each waste water facility in 2022 in New York State") +
theme(legend.position = "bottom",
legend.box.background = element_rect())
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
spaghetti_plot_2022
tables 2021
box_plot_2021 <- rtqpcr %>%
filter(year == 2021) %>%
group_by(month, wrrf_name) %>%
summarise(avg_conc = mean(concentration)) %>%
plot_ly(
x = ~avg_conc,
y = ~wrrf_name,
type = "box",
color = ~wrrf_name,
colors = "viridis"
) %>%
layout(
xaxis = list(title = "Mean concentration (N/L)"),
yaxis = list(title = "Area"),
showlegend = FALSE
)
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
box_plot_2021
tables 2022
box_plot_2022 <- rtqpcr %>%
filter(year == 2022) %>%
group_by(month, wrrf_name) %>%
summarise(avg_conc = mean(concentration)) %>%
plot_ly(
x = ~avg_conc,
y = ~wrrf_name,
type = "box",
color = ~wrrf_name,
colors = "viridis"
) %>%
layout(
xaxis = list(title = "Mean concentration (N/L)"),
yaxis = list(title = "Area"),
showlegend = FALSE
)
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
box_plot_2021
##Questions 1 The concentration of covid for each station on average 2 literature review foe two diff tech (RT-qPCR, dPCR) 3 Year 2021 and 2022